# Contributor: Carlo Landmeter <clandmeter@alpinelinux.org>
# Maintainer: Francesco Colista <fcolista@alpinelinux.org>
pkgname=tesseract-ocr
pkgver=5.3.3
_tdver=4.1.0
pkgrel=1
pkgdesc="open source OCR engine"
url="https://github.com/tesseract-ocr/tesseract/releases"
# gigantic package that will most likely never be used here
arch="all !s390x !armhf !x86 !ppc64le"
license="Apache-2.0"
makedepends="
	autoconf
	automake
	cairo-dev
	icu-dev
	leptonica-dev
	libtool
	opencl-dev
	pango-dev
	"
subpackages="$pkgname-dev"
source="$pkgname-$pkgver.tar.gz::https://github.com/tesseract-ocr/tesseract/archive/$pkgver.tar.gz
	https://github.com/tesseract-ocr/tessdata/archive/$_tdver/tessdata-$_tdver.tar.gz
	"
builddir="$srcdir"/tesseract-$pkgver
options="!check" # todo

_langs="
	afr
	ara
	aze
	bel
	ben
	bul
	cat
	ces
	chi_sim
	chi_tra
	chr
	dan
	deu
	eng
	enm
	epo
	equ
	est
	eus
	fin
	fra
	frk
	frm
	glg
	grc
	heb
	hin
	hrv
	hun
	ind
	isl
	ita
	ita_old
	jpn
	kan
	kat
	khm
	kor
	lav
	lit
	mal
	mkd
	mlt
	msa
	nld
	nor
	osd
	pol
	por
	ron
	rus
	slk
	slv
	spa
	spa_old
	sqi
	srp
	swa
	swe
	tam
	tel
	tgl
	tha
	tur
	ukr
	vie
	"

for _lang in $_langs; do
	subpackages="$subpackages $pkgname-data-$_lang:_lang_data:noarch"
done

prepare() {
	default_prepare
	GIT_DIR=. ./autogen.sh
}

build() {
	./configure \
		--build=$CBUILD \
		--host=$CHOST \
		--prefix=/usr \
		--sysconfdir=/etc \
		--mandir=/usr/share/man \
		--infodir=/usr/share/info \
		--localstatedir=/var \
		--disable-static \
		--enable-opencl
	make
	make training
}

package() {
	make DESTDIR="$pkgdir" install
	make DESTDIR="$pkgdir" training-install
	local lang
	for lang in $_langs; do
		install -Dm644 -t "$pkgdir"/usr/share/tessdata \
			"$srcdir"/tessdata-$_tdver/$lang.traineddata
	done
}

_lang_data() {
	local lang=${subpkgname#"$pkgname"-data-}
	pkgdesc="Tesseract language data for $lang"
	depends="$pkgname=$pkgver-r$pkgrel"
	amove usr/share/tessdata/$lang.*
}

sha512sums="
c04ae68ac4ecf85243c54feb4233e282cd420522588fd4b3eaa87619cb236a575052e3667a806c2f56de06dc013b88926c2dbea4cb4ee02f0119c032598169f2  tesseract-ocr-5.3.3.tar.gz
c0b55fb5542d25ebd4b56d25155cc8254027c9503af298641686388886403ee26ebf9ef47d21d530d372deeba6a01f6eb17705e1ab46bb02f9f7bf4f63805cd0  tessdata-4.1.0.tar.gz
"
